% Reliability

% mathprompter
@misc{imani2023mathprompter,
    title={MathPrompter: Mathematical Reasoning using Large Language Models},
    author={Shima Imani and Liang Du and Harsh Shrivastava},
    year={2023},
    eprint={2303.05398},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% unreliable CoT
@misc{ye2022unreliability,
    title={The Unreliability of Explanations in Few-shot Prompting for Textual Reasoning},
    author={Xi Ye and Greg Durrett},
    year={2022},
    eprint={2205.03401},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% analyzing harm
@misc{si2022prompting,
    title={Prompting GPT-3 To Be Reliable},
    author={Chenglei Si and Zhe Gan and Zhengyuan Yang and Shuohang Wang and Jianfeng Wang and Jordan Boyd-Graber and Lijuan Wang},
    year={2022},
    eprint={2210.09150},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% diverse prompts
@misc{li2022advance,
    title={On the Advance of Making Language Models Better Reasoners},
    author={Yifei Li and Zeqi Lin and Shizhuo Zhang and Qiang Fu and Bei Chen and Jian-Guang Lou and Weizhu Chen},
    year={2022},
    eprint={2206.02336},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% Ask-Me-Anything Prompting
@misc{arora2022ama,
  title = {Ask Me Anything: A simple strategy for prompting language models},
  author = {Arora, Simran and Narayan, Avanika and Chen, Mayee F. and Orr, Laurel and Guha, Neel and Bhatia, Kush and Chami, Ines and Sala, Frederic and Ré, Christopher},
  keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
  year = {2022},
  eprint={2210.02441},
  archivePrefix={arXiv},
  primaryClass={cs.CL}
}

% problems with biases
@misc{zhao2021calibrate,
    title={Calibrate Before Use: Improving Few-Shot Performance of Language Models},
    author={Tony Z. Zhao and Eric Wallace and Shi Feng and Dan Klein and Sameer Singh},
    year={2021},
    eprint={2102.09690},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% augment with search results
@misc{livin2022large,
    title={Can large language models reason about medical questions?},
    author={Valentin Liévin and Christoffer Egeberg Hother and Ole Winther},
    year={2022},
    eprint={2207.08143},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% enhanced self consistency
@misc{mitchell2022enhancing,
    title={Enhancing Self-Consistency and Performance of Pre-Trained Language Models through Natural Language Inference},
    author={Eric Mitchell and Joseph J. Noh and Siyan Li and William S. Armstrong and Ananth Agarwal and Patrick Liu and Chelsea Finn and Christopher D. Manning},
    year={2022},
    eprint={2211.11875},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

% bias in 0 shot CoT
@misc{shaikh2022second,
    title={On Second Thought, Let's Not Think Step by Step! Bias and Toxicity in Zero-Shot Reasoning},
    author={Omar Shaikh and Hongxin Zhang and William Held and Michael Bernstein and Diyi Yang},
    year={2022},
    eprint={2212.08061},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

@misc{chase2022evaluating,
    title={Evaluating language models can be tricky},
    author={Harrison Chase},
    year={2022},
    month={Dec},
    day={26},
    url={https://twitter.com/hwchase17/status/1607428141106008064}
}

% constitutional
@misc{bai2022constitutional,
    title={Constitutional AI: Harmlessness from AI Feedback},
    author={Yuntao Bai and Saurav Kadavath and Sandipan Kundu and Amanda Askell and Jackson Kernion and Andy Jones and Anna Chen and Anna Goldie and Azalia Mirhoseini and Cameron McKinnon and Carol Chen and Catherine Olsson and Christopher Olah and Danny Hernandez and Dawn Drain and Deep Ganguli and Dustin Li and Eli Tran-Johnson and Ethan Perez and Jamie Kerr and Jared Mueller and Jeffrey Ladish and Joshua Landau and Kamal Ndousse and Kamile Lukosuite and Liane Lovitt and Michael Sellitto and Nelson Elhage and Nicholas Schiefer and Noemi Mercado and Nova DasSarma and Robert Lasenby and Robin Larson and Sam Ringer and Scott Johnston and Shauna Kravec and Sheer El Showk and Stanislav Fort and Tamera Lanham and Timothy Telleen-Lawton and Tom Conerly and Tom Henighan and Tristan Hume and Samuel R. Bowman and Zac Hatfield-Dodds and Ben Mann and Dario Amodei and Nicholas Joseph and Sam McCandlish and Tom Brown and Jared Kaplan},
    year={2022},
    eprint={2212.08073},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}